# replication.R (this file) replicates 
# Figures 1, 4-5, B6-7, C8-C13, D14, E15-16 and 
# Tables 3, 4 C5, D6, F7, and F10.

# To replicate Tables F8-9, please follow the instructions provided on line 688 of the code 
# Please ensure the working directory is the same as that for replication.R (this file) and 
# that packages.R has been sourced and all packages installed

source("packages.R")

##################################################################################
# Map: Figure 1
##################################################################################
up.info = st_read("gadm36_IND.gpkg", layer = c("gadm36_IND_3")) %>%
    filter(NAME_1 == "Uttar Pradesh", NAME_2 == "Bahraich") %>%
    rename(District = NAME_2)

up.base = st_read("gadm36_IND.gpkg", layer = c("gadm36_IND_1")) %>% filter(NAME_1 == "Uttar Pradesh")

Figure_1 = ggplot() + geom_sf(data = up.base, size = 1, show.legend = FALSE, inherit.aes=FALSE) +
    geom_sf(data = up.info, aes(fill= District), inherit.aes=FALSE, alpha = 1, show.legend = "polygon") + scale_fill_grey(start = 0.3, end = 0.8) + theme_bw() + annotate(geom = "text", x = 81, y = 26.5, label = "Uttar Pradesh", size = 5) + theme(axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), panel.background=element_blank(), panel.border=element_blank(), panel.grid.minor=element_blank(), plot.background=element_blank(), legend.text = element_text(size = 12), legend.title = element_text(size = 14), legend.position = "bottom")

##################################################################################
# Power Analysis: Figures B6 and B7
##################################################################################
# I) Assumptions

load("ACCESS_Household.Rda")
load("ACCESS_Village.Rda")

# I conduct the power analysis on the variable satisfaction
# 501-506 in SVP, 2.84-85 in ACCESS

#1) Pull relevant data to create variable from household, limit observations to UP
household.select <- household.df %>%
    dplyr::select(household.id = m1_q3_hhid,
                  village.code = m1_q11_village_code,
                  subdistrict = m1_q10_subdistrict,
                  state = m1_q8_state,
                  primary.lighting = m3_q83_light_main,
                  sat.adequate = m3_q84_1_light_main_adequate,
                  sat.reliable = m3_q84_2_light_main_reliable,
                  sat.cost = m3_q84_3_light_main_expensive,
                  sat.safe = m3_q84_4_light_main_safe,
                  sat.overall = m3_q85_light_main_satisfy,
                  weight, village_pop) %>%
filter(state == "UTTAR\ PRADESH") %>%
dplyr::select(-subdistrict, -state, -weight, -village_pop) %>%
mutate_at(vars(starts_with("sat")), funs(ifelse(. == "Yes", 1, 0))) %>%
mutate(solar = ifelse(primary.lighting == "Solar Home System/Lantern", 1, 0)) %>%
mutate(sat.comb = (sat.adequate+ sat.cost+ sat.safe+ sat.reliable)/4) %>%
dplyr::select(-sat.adequate, -sat.cost, -sat.safe, -sat.reliable, -sat.overall, -primary.lighting)


#2) Find mean satisfaction and se among each group
diffFunction <- function(x, y, z){
    out = rep(NA, length(x))
    for(ii in 1:length(x)){
        out[ii] = ifelse(x[ii] =="Number", NA,
                         ifelse(x[ii] == "Average", z[ii]-y[ii],
                                ifelse(x[ii] == "Variance", z[ii] + y[ii],
                                       sqrt(y[ii]^2 + z[ii]^2))))}
    return(out)}

household.select.info <- household.select %>%
    group_by(solar) %>%
    summarise(Number = n(),
              Average = mean(sat.comb),
              Variance = var(sat.comb),
              SD = sqrt(Variance),
              SE = SD/sqrt(Number)) %>%
    mutate(solar = ifelse(solar == 1, "SHS", "No_SHS")) %>%
    gather(key = "variable", value = "value", -solar) %>%
    spread(solar, value) %>%
    mutate(Diff = diffFunction(variable, No_SHS, SHS)) %>%
    mutate_at(vars(No_SHS, SHS, Diff), funs(round(., 3)))

# Within village effect
household.select %>%
    filter(solar == 0) %>%
    group_by(village.code) %>%
    summarise(var_wi = var(sat.comb), tot = n(), num_term = (tot-1) * var_wi) %>%
    summarise(sum(num_term)/(sum(tot) - 252))


#3) Parameters
# Average treatment effect
ate_mean <- household.select.info$Diff[2]
ate_sd <- household.select.info$Diff[3]
ate_se <- household.select.info$Diff[5]

# Y0 Distribution
control_mean <- household.select.info$No_SHS[2]
control_sd <- household.select.info$No_SHS[4]

# II.A) Varying treatment effect
set.seed(2138)
possible.taus <- seq(from = 0, to = .05, by= 0.002)  # The treatment sizes we'll be considering-- based on hours DV
powers <- rep(NA, length(possible.taus))         # Empty object to collect simulation estimates
alpha <- 0.05                                    # Standard significance level
sims <- 120                                      # Number of simulations to conduct for each N
N = 200                                          # Number of households


#### Outer loop to vary treatment effect ####
#### Inner loop is the simulation ####

#### Create empty vectors
significant.experiments <- rep(NA, sims) # Empty object to count significant experiments

# Used for household-specific characteristics
hhid <- seq(1:N)
Y0_t1 <- c(rep(NA, length(N)))
Y0_t2 <- c(rep(NA, length(N)))
Change = c(rep(NA, length(N)))
Assignment = c(rep(NA, length(N)))
Treatment.Effect = c(rep(NA, length(N)))
Y1_t2 <- c(rep(NA, length(N)))

# Used for round-observation
hhid2 <- c(hhid, hhid)            # hhid (all hhid, repeated)
roundval <- c(rep(1, N), rep(2, N))       # round values (1 200 times and 2 200 times)
Yval = c(rep(NA, length(hhid2)))      # y_vals (empty)
Assignment2 = c(rep(NA, length(hhid2)))   # empty vector assignment indicators
Long.Dataframe <- data.frame(Household = hhid2,
                             Round = roundval,
                             Treated = Assignment2,
                             Value = Yval)


for (j in 1:length(possible.taus)){
    # j = 15
    tau = possible.taus[j] # Pick the jth value for N, serves as treatment effect mean

    for (i in 1:sims){
        Y0_t1 <- rnorm(n = N, mean = 0, sd = 0.25)
        Change <- rnorm(n = N, mean = 0, sd = 0.05)       # change between periods
        Treatment.Effect = rnorm(n = N,
                                 mean = tau,
                                 sd = 0.02)
        Assignment <- complete_ra(N = N)          # random assignment
        Y0_t2 <- Y0_t1 + Change               # second period untreated
        Y1_t2 <- Y0_t2 + Treatment.Effect         # second period treated
        Y_t2 <- Assignment * Y1_t2 + (1-Assignment) * Y0_t2   # realized value
        Yval = c(Y0_t1, Y_t2)                 # Round_1 + Round_2 (all round 1, all round 2)
        Assignment2 <- c(rep(0, N), Assignment)       # Assignment, 200 values for zero, then assignment
        Long.Dataframe$Household <- hhid2
        Long.Dataframe$Round <- roundval
        Long.Dataframe$Treated <- Assignment2
        Long.Dataframe$Value <- Yval
        fit.sim <- lm(Value ~ Treated + factor(Round) + factor(Household),
                      data = Long.Dataframe)  # fit regression
        variance.covariance <- cluster.vcov(fit.sim, Long.Dataframe$Household) #cluster vcov
        p.value <-  coeftest(fit.sim, variance.covariance)[2,4] # Extract p-values
        significant.experiments[i] <- (p.value <= alpha) # Determine significance according to p <= 0.05
        cat("Iteration ", j, "out of ", length(possible.taus), "\n Simulation ", i)
    }       # store average success rate (power) for each tau }}
    powers[j] = mean(significant.experiments)}


# II.B) Varying sample size
set.seed(2138)
tau <- 0.1                  # The treatment sizes we'll be considering-- based on hours DV
sample_size <- seq(2, 100, by = 2)           # Number of blocks (villages)
powers2 <- rep(NA, length(sample_size))      # Empty object to collect simulation estimates
alpha <- 0.05                                    # Standard significance level
sims <- 120                                      # Number of simulations to conduct for each N

#### Create empty vectors
significant.experiments <- rep(NA, sims) # Empty object to count significant experiments
temp_ss <- c()
# Used for household-specific characteristics


for (jj in 1:length(sample_size)){

    significant.experiments <- rep(NA, sims)         # Empty object to count significant experiments
    #### Inner loop to conduct experiments "sims" times over for each N ####
    temp_ss <- sample_size[jj]
    hhid <- seq(1:temp_ss)
    Y0_t1 <- c(rep(NA, length(temp_ss)))
    Y0_t2 <- c(rep(NA, length(temp_ss)))
    Change = c(rep(NA, length(temp_ss)))
    Assignment = c(rep(NA, length(temp_ss)))
    Treatment.Effect = c(rep(NA, length(temp_ss)))
    Y1_t2 <- c(rep(NA, length(temp_ss)))
    # Used for round-observation
    hhid2 <- c(hhid, hhid) # hhid (all hhid, repeated)
    roundval <- c(rep(1, temp_ss), rep(2, temp_ss))
    Yval = c(rep(NA, length(hhid2)))        # y_vals (empty)
    Assignment2 = c(rep(NA, length(hhid2))) # empty vector assignment indicators
    Long.Dataframe <- data.frame(Household = hhid2, Round = roundval,
                                 Treated = Assignment2, Value = Yval)
    for (i in 1:sims){
        Y0_t1 <- rnorm(n = temp_ss, mean = 0, sd = 0.25)
        Change <- rnorm(n = temp_ss, mean = 0, sd = 0.05)       # change between periods
        Assignment <- complete_ra(N = temp_ss) # random assignment
        Treatment.Effect <- rnorm(n = temp_ss, mean = tau, sd = 0.02)
        Y0_t2 <- Y0_t1 + Change # second period untreated
        Y1_t2 <- Y0_t2 + Treatment.Effect # second period treated
        Y_t2 <- Assignment * Y1_t2 + (1-Assignment) * Y0_t2 # realized value
        Yval = c(Y0_t1, Y_t2) # Round_1 + Round_2 (all round 1, all round 2)
        Assignment2 <- c(rep(0, temp_ss), Assignment) # Assignment, 200 values for zero, then assignment
        Long.Dataframe$Household <- hhid2
        Long.Dataframe$Round <- roundval
        Long.Dataframe$Treated <- Assignment2
        Long.Dataframe$Value <- Yval
        fit.sim <- lm(Value ~ Treated + factor(Round) + factor(Household),
                      data = Long.Dataframe)  # fit regression
        variance.covariance <- cluster.vcov(fit.sim, Long.Dataframe$Household) #cluster vcov
        p.value <-  coeftest(fit.sim, variance.covariance)[2,4] # Extract p-values
        significant.experiments[i] <- (p.value <= alpha) # Determine significance according to p <= 0.05
        cat("Iteration ", jj, "out of ", length(sample_size), "\n Simulation ", i)       # store average success rate (power) for each N }}
        p.value <- summary(fit.sim)$coefficients[2,4]  # Extract p-values
        significant.experiments[i] <- (p.value <= alpha) # Determine significance according to p <= 0.05
    }
    powers2[jj] <- mean(significant.experiments)       # store average success rate (power) for each N
}


# Plots varying effect size

TauPower <- tibble(possible.taus, powers)
TauPower %>% mutate(diff80 = powers - .8) %>% filter(diff80 > 0) %>% filter(diff80 == min(diff80)) %>% dplyr::select(possible.taus)

maxval_effect <- as.numeric(TauPower %>% mutate(diff80 = powers - .8) %>% filter(diff80 > 0) %>% filter(diff80 == min(diff80)) %>% dplyr::select(possible.taus))

Figure_B6 <- TauPower %>% ggplot(aes(x = possible.taus, y = powers))  + geom_line() + theme_bw() +
    geom_vline(xintercept = maxval_effect) +
    #         geom_vline(xintercept = ate_mean, linetype = "dashed") +
    annotate("text", x = 0.03, y = 0.4, label = "Minimum\n detectable\n effect", size = 3.5) +
    #         annotate("text", x = 0.15, y = 0.4, label = "Difference in\n means\n (ACCESS)", size = 3.5) +
    xlab("Treatment Effect") + ylab("Power")


# Plots varying respondents
TauPower2 <- tibble(sample_size, powers2)
maxval_respondent <- as.numeric(TauPower2 %>% mutate(diff80 = powers2 - 0.8) %>% filter(diff80 > 0) %>% filter(diff80 == min(diff80)) %>% dplyr::select(sample_size))
sample_size[maxval_respondent]

Figure_B7 <- TauPower2 %>%
    ggplot(aes(x = sample_size, y = powers2))  + geom_line()  + theme_bw() + geom_vline(xintercept = 11) + annotate("text", x = 36, y = 0.5, label = "Minimum number\n respondents for\n 80% power", size = 3) +  ylab("Power")

##################################################################################
# Calculate dependent variables
##################################################################################
load("combined_data_full.Rda")

# Make all dependent variables numerics and replace NA with zero
expenditures = c("spend.kerosene.fuel", "spend.generator", "spend.lpg",
                 "spend.battery", "spend.solar", "other.lighting.source.1.expend_expend",
                 "other.lighting.source.2.expend_expend", "other.lighting.source.3.expend_expend", "expend.lighting",
                 "expend.total.three.months")

artificial.lighting =c("hrs.day.from.kerosene", "hrs.day.from.generator", "hrs.day.from.lpg",
                       "hrs.day.from.battery", "hrs.day.from.solar", "other.lighting.source.1.hrs_hrs",
                       "other.lighting.source.2.hrs_hrs", "other.lighting.source.3.hrs_hrs")

satisfaction = c("cost.sat", "safety.sat", "quality.sat",
                 "reliability.sat", "overall.sat", "adequate.reading",
                 "adequate.working", "adequate.cooking", "adequate.studying",
                 "amount.adequate", "amount.reliable", "amount.inexpensive", "amount.safe")

perceptions = c("kerosene.v.solar.quality", "kerosene.v.solar.reliability", "kerosene.v.solar.ease",
                "kerosene.v.solar.cost", "kerosene.v.solar.productivity", "solar.heard")

time.use = c("hrs.read.home", "hrs.work.home", "hrs.cook.home",
             "hrs.mobile.home", "hrs.read.home.spouse", "hrs.work.home.spouse",
             "hrs.cook.home.spouse", "hrs.mobile.home.spouse", "hrs.read.home.children",
             "hrs.work.home.children", "hrs.cook.home.children", "hrs.mobile.home.children",
             "hrs.study.home.children")


toNumerics <- c(expenditures, artificial.lighting, satisfaction, perceptions, time.use[str_detect(time.use, "children") == FALSE])

toNumericFunction <- function(x){
    y <- as.numeric(x)
    y <- ifelse(is.na(y), 0, y)
    return(y)}

combined_data_full <- combined_data_full %>% mutate_at(.vars = toNumerics, .funs = toNumericFunction)


# NOTE: To check whether we should do this with hrs spent for children/spouse
# since families don't have values for children (even though total children > 0) and
# will depend on number of children
# 17184804, 17162501

# 2A) EXPENDITURES
# DV_MLE: find total monthly lighting expenditures
# DV_NLE: and three-month nonlighting expenditures

combined_data_full <- combined_data_full %>%
    mutate(DV_MLE = spend.kerosene.fuel + spend.generator +
           spend.lpg +  spend.solar + spend.battery +
           other.lighting.source.1.expend_expend +
           other.lighting.source.2.expend_expend +
           other.lighting.source.3.expend_expend,
       DV_NLE = expend.total.three.months - expend.lighting)

# 2B) ARTIFICIAL LIGHTING
# DV_TH: total hours
# DV_NSH: total non-solar hours

combined_data_full = combined_data_full %>%
    mutate(DV_TH = hrs.day.from.kerosene + hrs.day.from.generator +
           hrs.day.from.lpg + hrs.day.from.battery +  hrs.day.from.solar +
           other.lighting.source.1.hrs_hrs + other.lighting.source.2.hrs_hrs +
           other.lighting.source.3.hrs_hrs,
       DV_NSH = DV_TH -  hrs.day.from.solar - other.lighting.source.1.hrs_hrs -
           other.lighting.source.2.hrs_hrs - other.lighting.source.3.hrs_hrs)


# 2C) SATISFACTION
# DV_LS: lighting satisfaction
# DV_AS: activity satisfaction: RESCALED SO THAT 1 IS LOW, 5 IS HIGH; NOTE DIFFERENT SCALING FOR LIGHTING
# DV_AS2: activity satisfaction weighted by time spent; excluded children from total when was NA

combined_data_full <- combined_data_full %>%
    mutate(no_child_info =
           case_when(is.na(combined_data_full %>%
                           dplyr::select(contains("children"),
                                         -contains("lantern"),
                                         -contains("solar")) %>%
                           rowSums()) == TRUE ~ 1,
                     TRUE ~ 0))

combined_data_full = combined_data_full %>%
    mutate_at(vars(adequate.reading, adequate.working, adequate.cooking, adequate.studying),
              funs(6 - .)) %>%
mutate(DV_LS = (cost.sat + safety.sat + quality.sat + reliability.sat)/4,
       DV_AS = (adequate.reading + adequate.working +
                adequate.cooking + adequate.studying)/4,
       DV_AS2_reading_num = case_when(no_child_info == 1 ~
                                      hrs.read.home + hrs.read.home.spouse,
                                  TRUE ~ hrs.read.home + hrs.read.home.spouse +
                                      hrs.read.home.children),
       DV_AS2_working_num = case_when(no_child_info == 1 ~
                                      hrs.work.home + hrs.work.home.spouse,
                                  TRUE ~ hrs.work.home + hrs.work.home.spouse +
                                      hrs.work.home.children),
       DV_AS2_cooking_num = case_when(no_child_info == 1 ~
                                      hrs.cook.home + hrs.cook.home.spouse,
                                  TRUE ~ hrs.cook.home + hrs.cook.home.spouse +
                                      hrs.cook.home.children),
       DV_AS2_denom = DV_AS2_reading_num + DV_AS2_working_num + DV_AS2_cooking_num,
       DV_AS2 = adequate.reading * (DV_AS2_reading_num/DV_AS2_denom) +
           adequate.working * (DV_AS2_working_num/DV_AS2_denom) +
           adequate.cooking * (DV_AS2_cooking_num/DV_AS2_denom))

# 2D) PERCEPTION
# note that only respondents who had heard of solar lanters held perceptions
# NOTE: suggestion is to exclude respondents who had not heard of it from pre/post

# DV_Perception: mean perception IF had heard of solar

combined_data_full <- combined_data_full %>%
    mutate(DV_Perc = case_when(solar.heard == 1 ~
                               (kerosene.v.solar.quality +
                                kerosene.v.solar.reliability +
                                kerosene.v.solar.ease +
                                kerosene.v.solar.cost +
                                kerosene.v.solar.productivity)/5)) %>%
mutate_at(vars(starts_with("kerosene.v.")),
          funs(case_when(solar.heard == 1 ~ .)))

# 2D) TIME USE
# NOTE: One item we'd discussed after posting the PAP was that we didn't have
# "studying" data for adults. we could use studying data for children
# but seemed to be "NA" in three cases, even though there was always a value
# greater than zero for "children live here"

# FOR NOW:
# IF NO CHILD INFO:
# DV_AT: Calculate average of home and spouse for reading/cooking/working and then average

# ELSE:
# DV_AT: Calculate average of all three groups reading/cooking/working/studying and then average
# DV_AT_normalized: I've normalized studying time by time spent per children

combined_data_full = combined_data_full %>%
    mutate(AverageReading =
           case_when(no_child_info == 0 ~ DV_AS2_reading_num/3,
                     no_child_info == 1 ~ DV_AS2_reading_num/2),
           AverageWorking = case_when(no_child_info == 0 ~ DV_AS2_working_num/3,
                                      no_child_info == 1 ~ DV_AS2_working_num/2),
           AverageCooking = case_when(no_child_info == 0 ~ DV_AS2_cooking_num/3,
                                      no_child_info == 1 ~ DV_AS2_cooking_num/2),
           DV_AT = case_when(no_child_info == 0 ~ (AverageReading +
                                                   AverageWorking +
                                                   AverageCooking +
                                                   hrs.study.home.children)/4,
                             no_child_info == 1 ~ (AverageReading +
                                                   AverageWorking +
                                                   AverageCooking)/3),
           DV_AT_normalized =
               case_when(no_child_info == 0 ~ (AverageReading +
                                               AverageWorking +
                                               AverageCooking +
                                               (hrs.study.home.children/children.live.here))/4,
                         no_child_info == 1 ~ (AverageReading +
                                               AverageWorking +
                                               AverageCooking)/3),
           DV_MT =
               case_when(no_child_info == 0 ~ (hrs.mobile.home +
                                               hrs.mobile.home.spouse +
                                               hrs.mobile.home.children)/3,
                         no_child_info == 1 ~ (hrs.mobile.home + hrs.mobile.home.spouse)/2))

combined_data_full_dv <- combined_data_full
combined_data_included_dv <- combined_data_full_dv %>% filter(endline_no_treatment == 0)

##################################################################################
# Balance and Descriptive Statistics: Figures C8-C13
##################################################################################
# The "belong.treatment" variable is NA for all baseline observations since they hadn't
# yet been assigned to treatment when information was collected. Add variable
# "ever.treated" which is 1 for baseline/endline data for all households treated in endline
# and zero for those that were never treated


combined_data_balance = combined_data_included_dv %>% as.tibble() %>%
    gather(variable, value, -Survey, -hhid) %>%
    unite(temp, Survey, variable) %>% spread(temp, value) %>%
    mutate(Baseline_ever.treated = Endline_belong.treatment,
           Endline_ever.treated = Endline_belong.treatment) %>%
    gather(Variable, Value, -hhid) %>%
    separate(Variable, into = c("Survey", "Variable"), sep = "_", extra = "merge", convert = TRUE) %>%
    spread(Variable, Value) %>% type_convert(trim_ws = T) %>% ungroup() %>% as.tibble()


balance = combined_data_balance %>%
    filter(Survey == "Baseline") %>%
    dplyr::select_if(function(x){!all(is.na(x)|x==0|x==1) & is.numeric(x)})  %>%  # exclude all NA, 0, or 1
    dplyr::select(-ends_with("own"), -ends_with("house"), #exclude numerator/denominator in calculations, number X owned
                  -starts_with("own."), -contains("calculate"),
                  -contains("denom"), -ends_with("_num"),
                  -pucca,  -hhid, -district, -occupation) %>%
    cbind(combined_data_balance %>% filter(Survey == "Baseline") %>%
          dplyr::select(ever.treated)) #add "ever.treated" variable

    # remove columns where all values are NA

    balance %>% mutate_all(funs(is.na(.))) %>% summarize_all(funs(sum(as.numeric(.))))

    # Before running regressions, rescale some variables so more easily interpretable
    balance = balance %>%
        mutate_at(vars(crime.non.violent.opinion, crime.violent.opinion, safety.concern), funs(6 - .)) %>%
        mutate(religion = ifelse(religion == 2, 0, religion)) %>% rename(hindu = religion) %>%
        mutate_at(vars(adequate.reading, adequate.working, adequate.cooking, adequate.studying), funs(6 - .)) %>% as_tibble()

    # Standardize
    balance.st = balance %>% mutate_at(vars(-village.code, -ever.treated, -village.code),
                                       funs(. - mean(., na.rm = TRUE))) %>%
    mutate_at(vars(-village.code, -ever.treated, -village.code), funs(./sd(., na.rm = TRUE)))


# Assign variables to relevant groups
iter_names = names(balance)[!(names(balance) %in% c("ever.treated", "village.code", "hamlet.name"))]
# no name for these geographic factors

temp.val = character()
temp.val.st = character()

bt.estimate = rep(NA, length(iter_names))
bt.estimate.st = rep(NA, length(iter_names))

bt.se = rep(NA, length(iter_names))
bt.se.st = rep(NA, length(iter_names))



# Run regressions


for(ii in 1:length(iter_names)){
    temp.val = paste0("lm(", iter_names[ii], " ~ ever.treated + factor(village.code), balance)")
    temp.val.st = paste0("lm(", iter_names[ii], " ~ ever.treated + factor(village.code), balance.st)")

    tempreg = eval(parse(text = temp.val))
    tempreg.st = eval(parse(text = temp.val.st))

    bt.estimate[ii] = coeftest(tempreg, cluster.vcov(tempreg, balance$village.code))[2, 1]

    bt.estimate.st[ii] = coeftest(tempreg.st, cluster.vcov(tempreg.st, balance.st$village.code))[2, 1]

    bt.se[ii] = coeftest(tempreg, cluster.vcov(tempreg, balance$village.code))[2, 2]
    bt.se.st[ii] = coeftest(tempreg.st, cluster.vcov(tempreg.st, balance.st$village.code))[2, 2]

    temp.val = character()
    temp.val.st = character()
    print(iter_names[ii])
}

balance.diag = data.frame(iter_names, bt.estimate, bt.se) %>%
    mutate(bt.low = bt.estimate - 1.96 * bt.se,
           bt.hi = bt.estimate + 1.96 * bt.se) %>%
dplyr::select(-bt.se)

balance.diag.st = data.frame(iter_names, bt.estimate.st, bt.se.st) %>%
    mutate(bt.low = bt.estimate.st - 1.96 * bt.se.st,
           bt.hi = bt.estimate.st + 1.96 * bt.se.st) %>%
dplyr::select(-bt.se.st)


# Assign variables to groups that will be in the same plot

balance.diag.2 = balance.diag %>%
    mutate(catval = case_when(str_detect(iter_names, "adequate.") == TRUE ~ "Adequate", str_detect(iter_names, "Average") == TRUE ~ "Averages", str_detect(iter_names, "safety.concern|violent") == TRUE ~ "Crime", str_detect(iter_names, "DV_") == TRUE ~ "DV", str_detect(iter_names, "wtp") == TRUE ~ "Primary Economic Variables", str_detect(iter_names, ".sat$") == TRUE ~ "Satisfaction", str_detect(iter_names, "kerosene.v.solar") == TRUE ~ "Kerosene v. Solar", str_detect(iter_names, "not.solar.") == TRUE ~ "Not solar", str_detect(iter_names, "^hrs\\..*home$") == TRUE ~ "Time spent (respondent)", str_detect(iter_names, "^hrs\\..*children$") == TRUE ~ "Time spent (children)", str_detect(iter_names, "^hrs\\..*spouse$") == TRUE ~ "Time spent (spouse)", (iter_names %in% c("expend.consumer", "expend.doctor", "expend.estate", "expend.food", "expend.housing.durable", "expend.marriage", "expend.other", "expend.debt", "hh.purchase.decisionmaker", "land.owned", "gov.sub.solar.kerosene", "ration.card.type")) ~ "Secondary Economic Variables", (iter_names %in% c("expend.total.three.months", "expend.lighting", "borrowed", "amt.in.debt", "econ.activity", "spend.kerosene.fuel", "hrs.day.from.kerosene")) ~ "Primary Economic Variables", (iter_names %in% c("age", "caste", "children.live.here", "total.children", "num.adults", "schooling", "hindu")) ~ "Demographics", TRUE ~ as.character(iter_names))) %>%
    filter(!(str_detect(iter_names, "other..*"))) %>%
    mutate(iter_names = case_when(catval == "DV"~ str_to_upper(iter_names), catval != "DV" ~ str_to_title(iter_names))) %>%
    arrange(catval, iter_names)

balance.diag.2.st = balance.diag.st %>%
    mutate(catval = case_when(str_detect(iter_names, "adequate.") == TRUE ~ "Adequate", str_detect(iter_names, "Average") == TRUE ~ "Averages", str_detect(iter_names, "safety.concern|violent") == TRUE ~ "Crime", str_detect(iter_names, "DV_") == TRUE ~ "DV", str_detect(iter_names, "wtp") == TRUE ~ "Primary Economic Variables", str_detect(iter_names, ".sat$") == TRUE ~ "Satisfaction", str_detect(iter_names, "kerosene.v.solar") == TRUE ~ "Kerosene v. Solar", str_detect(iter_names, "not.solar.") == TRUE ~ "Not solar", str_detect(iter_names, "^hrs\\..*home$") == TRUE ~ "Time spent (respondent)", str_detect(iter_names, "^hrs\\..*children$") == TRUE ~ "Time spent (children)", str_detect(iter_names, "^hrs\\..*spouse$") == TRUE ~ "Time spent (spouse)", (iter_names %in% c("expend.consumer", "expend.doctor", "expend.estate", "expend.food", "expend.housing.durable", "expend.marriage", "expend.other", "expend.debt", "hh.purchase.decisionmaker", "land.owned", "gov.sub.solar.kerosene", "ration.card.type")) ~ "Secondary Economic Variables", (iter_names %in% c("expend.total.three.months", "expend.lighting", "borrowed", "amt.in.debt", "econ.activity", "spend.kerosene.fuel", "hrs.day.from.kerosene")) ~ "Primary Economic Variables", (iter_names %in% c("age", "caste", "children.live.here", "total.children", "num.adults", "schooling", "hindu")) ~ "Demographics", TRUE ~ as.character(iter_names))) %>%
    filter(!(str_detect(iter_names, "other..*"))) %>%
    mutate(iter_names = case_when(catval == "DV"~ str_to_upper(iter_names), catval != "DV" ~ str_to_title(iter_names))) %>%
    arrange(catval, iter_names)

Balance.Sat = balance.diag.2 %>%
    filter(catval %in% c("Adequate", "Crime", "Satisfaction", "Kerosene v. Solar")) %>%
    mutate(iter_names = c("Cooking", "Reading", "Studying", "Working", "Non-violent crime rate is high", "Violent crime rate is high", "Concern about family's safety", "Lighting will reduce non-violent crime", "Lighting will reduce violent crime", "More affordable", "Easier to use", "Greater productivity", "Higher quality", "More reliable", "Satisfaction with cost", "Overall satisfaction", "Satisfaction with quality", "Satisfaction with reliability", "Satisfaction with safety"))

Balance.Sat.st = balance.diag.2.st %>%
    filter(catval %in% c("Adequate", "Crime", "Satisfaction", "Kerosene v. Solar")) %>%
    mutate(iter_names = c("Cooking", "Reading", "Studying", "Working", "Non-violent crime rate is high", "Violent crime rate is high", "Concern about family's safety", "Lighting will reduce non-violent crime", "Lighting will reduce violent crime", "More affordable", "Easier to use", "Greater productivity", "Higher quality", "More reliable", "Satisfaction with cost", "Overall satisfaction", "Satisfaction with quality", "Satisfaction with reliability", "Satisfaction with safety"))

Balance.Hours = balance.diag.2 %>%
    filter(catval %in% c("Time spent (spouse)", "Time spent (children)", "Time spent (respondent)")) %>%
    mutate(iter_names = c("Cooking", "Charging mobile", "Reading", "Studying", "Working", "Cooking", "Charging mobile", "Reading", "Working", "Cooking", "Reading", "Working"))

Balance.Hours.st = balance.diag.2.st %>%
    filter(catval %in% c("Time spent (spouse)", "Time spent (children)", "Time spent (respondent)")) %>%
    mutate(iter_names = c("Cooking", "Charging mobile", "Reading", "Studying", "Working", "Cooking", "Charging mobile", "Reading", "Working", "Cooking", "Reading", "Working"))


Balance.Econ = balance.diag.2 %>%
    filter(catval %in% c("Demographics", "Primary Economic Variables")) %>%
    filter(iter_names != "Econ.activity") %>%
    mutate(iter_names = c("Age", "Caste", "Number of children\nresiding", "Number of adults\nresiding", "Hindu", "Education", "Total number of\nchildren", "Total debt", "Total borrowed", "Lighting expenditures", "Total expenditures", "WTP (grid)", "Lighting from\nkerosene", "WTP (solar installation)", "WTP (solar panels)", "Kerosene fuel\nexpenditures"))

Balance.Econ.st = balance.diag.2.st %>%
    filter(catval %in% c("Demographics", "Primary Economic Variables")) %>%
    filter(iter_names != "Econ.activity") %>%
    mutate(iter_names = c("Age", "Caste", "Number of children\nresiding", "Number of adults\nresiding", "Hindu", "Education", "Total number of\nchildren", "Total debt", "Total borrowed", "Lighting expenditures", "Total expenditures", "WTP (grid)", "Lighting from\nkerosene", "WTP (solar installation)", "WTP (solar panels)", "Kerosene fuel\nexpenditures"))

Balance.DVs = balance.diag.2 %>% filter(catval %in% c("DV")) %>%
    mutate(iter_names = c("Satisfaction\nfrom leisure activities", "Satisfaction\nfrom leisure activities (wtd.)", "Time spent on\nleisure activities", "Time spent on\nleisure activities (normalized)", "Satisfaction\nwith lighting", "Monthly lighting\nexpenditures", "Time spent on\ncharging mobile devices", "Non-lighting\nexpenditures", "Hours of\nlighting (non-solar)", "Perception\nof solar", "Total hours\nof lighting"))

Balance.DVs.st = balance.diag.2.st %>% filter(catval %in% c("DV")) %>%
    mutate(iter_names = c("Satisfaction\nfrom leisure activities", "Satisfaction\nfrom leisure activities (wtd.)", "Time spent on\nleisure activities", "Time spent on\nleisure activities (normalized)", "Satisfaction\nwith lighting", "Monthly lighting\nexpenditures", "Time spent on\ncharging mobile devices", "Non-lighting\nexpenditures", "Hours of\nlighting (non-solar)", "Perception\nof solar", "Total hours\nof lighting"))

Balance.Secondary = balance.diag.2 %>% filter(catval %in% c("Secondary Economic Variables", "Not solar"))

Balance.Secondary.st = balance.diag.2.st %>% filter(catval %in% c("Secondary Economic Variables", "Not solar"))

# Function to plot
balance.plot.f = function(bal.df){
    bal.df = bal.df %>% arrange(catval, iter_names)
    bal.df.plot = bal.df %>%
        mutate(iter_names = factor(iter_names, levels = rev(bal.df$iter_names))) %>%
        ggplot(aes(x = iter_names, y = bt.estimate,  ymin=bt.low, ymax=bt.hi)) + facet_wrap(~catval, scales = "free", nrow = 2) + geom_pointrange() + geom_hline(yintercept = 0, lty=2) +  coord_flip() +  xlab("Coefficient for Treated") + ylab("Mean and Confidence Interval") + theme_tufte() + scale_colour_stata(name = "Description") + theme(axis.text.y = element_text(hjust= 0))
    return(bal.df.plot)}

Balance.Sat.st = Balance.Sat.st %>% rename(bt.estimate = bt.estimate.st)
Balance.Econ.st = Balance.Econ.st %>% rename(bt.estimate = bt.estimate.st)
Balance.DVs.st = Balance.DVs.st %>% rename(bt.estimate = bt.estimate.st)
Balance.Secondary.st = Balance.Secondary.st %>% rename(bt.estimate = bt.estimate.st)


# Plot
Balance.Sat = Balance.Sat %>%
    mutate(catval = ifelse(catval == "Kerosene v. Solar", "Solar compared to kerosene", ifelse(catval == "Adequate", "Adequacy of lighting for ...", catval)))

Balance.Sat.st = Balance.Sat.st %>%
    mutate(catval = ifelse(catval == "Kerosene v. Solar", "Solar compared to kerosene", ifelse(catval == "Adequate", "Adequacy of lighting for ...", catval)))


Figure_C8 <- balance.plot.f(Balance.DVs.st)
Figure_C9 <- balance.plot.f(Balance.Econ.st)
Figure_C10 <- balance.plot.f(Balance.Sat.st)


# Distribution of lighting used in baseline
baseline_use <- combined_data_included_dv %>% filter(Survey == "Baseline") %>% dplyr::select(hhid, starts_with("hrs.day."), starts_with("spend.")) %>% as.tibble()

# Two household use other sources, but HH 17162501 has not specified what the other type is so remove it
combined_data_included_dv %>% filter(hhid == 17162501 & Survey == "Baseline") %>% dplyr::select(contains("other"))

baseline_use_other <- combined_data_included_dv %>% filter(Survey == "Baseline") %>%
    dplyr::select(hhid, starts_with("hrs.day."), starts_with("spend."), starts_with("other.lighting.source")) %>%
    gather(key = "Other_Type", value = "Hours", -c(hhid:spend.solar)) %>% filter(!(is.na(Hours)|Hours==0)) %>%
    filter(hhid != 17162501) %>% dplyr::select(hhid, Other_Type, Hours) %>% mutate(hrs.day.from.candle = 2, spend.candle = 20) %>%
    dplyr::select(-Other_Type) %>% as.tibble()

baseline_use <- baseline_use %>%
    mutate(hrs.day.from.candle = ifelse(hhid == 17179904, 2, 0), spend.candle = ifelse(hhid == 17179904, 20, 0))


baseline_use_2 <- baseline_use %>% gather(key = "variable", value = "value", -hhid) %>%
    mutate(variable_type = ifelse(str_detect(variable, "hrs.day"), "Hours", "Expenditures")) %>%
    mutate(source_type = case_when(str_detect(variable, "generator") ~ "Generator",
                                   str_detect(variable, "kerosene") ~ "Kerosene lamp",
                                   str_detect(variable, "candle") ~ "Candle",
                                   str_detect(variable, "battery") ~ "Battery-charged lamp",
                                   str_detect(variable, "lpg") ~ "LPG lamp",
                                   str_detect(variable, "solar") ~ "Solar-charged lamp")) %>%
    dplyr::select(-variable) %>% spread(variable_type, value)



baseline_use_3 <- baseline_use_2  %>%
    mutate(Use = ifelse(Hours > 0, 1, 0),
           Cost_per_Hour = ifelse(Hours > 0, (Expenditures/30)/Hours, NA)) %>%
filter(source_type == "Kerosene lamp") %>%
dplyr::select(-hhid, -Use, -source_type) %>%
gather()

Figure_C11 <- baseline_use_3 %>%
    filter(key == "Expenditures" & value < 1000) %>%
    ggplot(aes(x = value)) + geom_density(fill = "black", alpha = 0.3) +
    scale_y_continuous(breaks = seq(from = 0, to = 0.01, by = 0.002), expand = c(0, 0)) +
    xlab("Kerosene expenditures per month (INR)") + ylab("Proportion of households") +
    theme_tufte()

Figure_C12 <- baseline_use_3 %>%
    filter(key == "Cost_per_Hour" & value < 5) %>%
    ggplot(aes(x = value)) + geom_density(fill = "black", alpha = 0.3) +
    scale_x_continuous(breaks = seq(from = 0, to = 3, by = 0.5), expand = c(0, 0)) +
    scale_y_continuous(breaks = seq(from = 0, to = 1, by = 0.2), expand = c(0, 0)) +
    xlab("Average kerosene expenditures per hour (INR)") + ylab("Proportion of households") +
    theme_tufte()


Figure_C13 <- baseline_use_3 %>%
    filter(key == "Hours") %>%
    ggplot(aes(x = value)) + geom_density(fill = "black", alpha = 0.3) +
    scale_y_continuous(breaks = seq(from = 0, to = .3, by = 0.1), expand = c(0, 0)) +
    xlab("Lighting from kerosene (hours per day)") + ylab("Proportion of households") +
    theme_tufte()

##################################################################################
# Summary Table of Dependent Variables: Table C5
##################################################################################
combined_data_2 <- combined_data_included_dv %>% as_tibble() %>% group_by(hhid) %>%
    mutate(ever.treated = belong.treatment[1]) %>% ungroup() %>%
    mutate(treatmentType = case_when(is.na(belong.treatment) & ever.treated == 1 ~ "Baseline Treated", belong.treatment == 1 & ever.treated == 1 ~ "Endline Treated", is.na(belong.treatment) & ever.treated == 0 ~ "Baseline Non-treated", belong.treatment == 0 & ever.treated == 0 ~ "Endline Non-treated", TRUE ~ "NA")) %>%
    dplyr::select(treatmentType, starts_with("DV"), starts_with("Average"), adequate.reading, adequate.working, adequate.cooking, adequate.studying) %>%
    mutate_at(vars(starts_with("adequate")), ~6 - .) %>%
    gather(key, value, -treatmentType) %>%
    filter(!(is.na(value))) %>%
    group_by(treatmentType, key) %>%
    summarize(total = n(), avg = mean(value), se = sd(value)/sqrt(total)) %>%
    ungroup() %>%
    mutate_at(vars(total, avg, se), ~ as.numeric(.)) %>%
    filter(!str_detect(key, c("[num|denom|normalized|2]$")))


# Dataframe for ordering the variables

k1 <- c("adequate.cooking", "adequate.reading", "adequate.studying", "adequate.working", "AverageCooking", "AverageReading", "AverageWorking", "DV_AS", "DV_AT", "DV_LS", "DV_MLE", "DV_MT", "DV_NLE", "DV_NSH", "DV_Perc", "DV_TH")


k2 <- c("Adequacy of Lighting for Cooking", "Adequacy of Lighting for Reading", "Adequacy of Lighting for Studying", "Adequacy of Lighting for Working", "Average Time Cooking", "Average Time Reading", "Average Time Working", "Satisfaction from leisure activities", "Time spent on leisure activities (hrs)", "Lighting Satisfaction", "Monthly lighting expenditures", "Time spent on charging mobile devices", "Non-lighting $", "Hours of lighting (non-solar)", "Perception of solar", "Total hours of lighting")

ordering <- c(12, 13, 15, 14, 6, 7, 8, 11, 5, 10, 1, 9, 2, 4, 16, 3)

cdTemp <- tibble(x = k1, y = k2, ...z = ordering) %>% rename(key = x, key2 = y, ordering = ...z)

combined_data_3 <- combined_data_2  %>% left_join(cdTemp) %>% dplyr::select(-key)

# Table C5
# Choose location of file to print out
Table_C5 <- combined_data_3 %>% dplyr::select(Variable = key2, Mean = avg, `Std. Error` = se, `n` = total, treatmentType, ordering) %>% split(.$treatmentType) %>%
    map(function(x){
            treatmentType <- unique(x$treatmentType)
            t1 <- x %>% arrange(ordering) %>% dplyr::select(-treatmentType, -ordering)
            t2 <- t1 %>% xtable(display = c("d", "s", "f", "f", "d"))
            print(t2, include.rownames = FALSE,
                  append = TRUE, floating = FALSE)
           })

##################################################################################
# Main Analysis: Figure 4 and D14; Tables 3-4, D6, F7-F10
##################################################################################
added_on = c("DV_TH, DV_NSH, DV_LS, DV_AS, DV_Perc, AverageReading, AverageWorking, AverageCooking, DV_AT, DV_MT")

# NOTE: the code below replicates Tables 3 and 4. For tables F8 and F9, please replace combined_data_included_dv below with combined_data_full_dv

# 1) create logVariables to standardize and rename
combined_data_analysis = combined_data_full_dv %>%
    mutate(belong.treatment = ifelse(Survey == "Baseline", 0, belong.treatment), DV_MLE = log(1 + DV_MLE), DV_NLE = log(1 + DV_NLE)) %>%
    rename(DV_AverageReading = AverageReading, DV_AverageWorking = AverageWorking, DV_AverageCooking = AverageCooking) %>%
    mutate_at(vars(DV_TH, DV_NSH, DV_LS, DV_AS, DV_Perc, DV_AverageReading, DV_AverageWorking, DV_AverageCooking, DV_AT, DV_MT), funs(logval = log(1 + .))) %>%
    rename(AverageReading = DV_AverageReading, AverageWorking = DV_AverageWorking, AverageCooking = DV_AverageCooking)

# 2) Arrange as panel data to incorporate fixed effects
fe.df = pdata.frame(combined_data_analysis, index = c("Survey", "hhid"))

# 3) Create vector of all dependent variable names in current file
dep_vars = names(combined_data_analysis)[str_detect(names(combined_data_analysis), "(?!.*num(?:|$))(?!.*denom(?:|$))^DV")] %>% as.list()

# 4) Associated names for regression table
dep_vars_names = c("Lighting \\$", "Non-lighting \\$", "Lighting Hrs.", "Non-Solar Hrs", "Lighting Sat.", "Activity Sat.", "Wtd. Activity Sat", "Perception", "Activity Time", "Activity Time Norm.", "Mobile Time", "Lighting Hrs. (log)", "Non-solar hours. (log)", "Lighting satisfaction (log)", "Activity Satisfaction (log)", "Perception (log)", "Average Reading (log)", "Average Working (log)", "Average Cooking (log)", "Activity Time (log)", "Mobile Time (log)")

# Fixed Effects (no clustered ses) -- loop over each of dep_vars as DV
f.ols.fixed <- function(depvar){
    temp = paste0("plm(", depvar, " ~ belong.treatment, data = fe.df, model = 'within', effect = 'twoways')")
    temp.reg = eval(parse(text = temp))
    temp.summ = summary(temp.reg)
    return(temp.reg)}

ols_list_fixed = lapply(dep_vars, f.ols.fixed)

# White clustered SEs
hetero_consistent_se_white = lapply(ols_list_fixed, function(x) sqrt(diag(vcovHC(x, method = c("white1"), cluster = c("group")))))


# Now add clustered standard errors and BH multiple comparisons test

# First update standard errors and manually change F statistic since stargazer doesn't do that
f.stat.function = function(regval, methodval){
    pval = ((pwaldtest(regval, test = "F", vcov = vcovHC(regval, method = c(methodval), cluster = c("group"))))$p.value[1])
    fval = ((pwaldtest(regval, test = "F", vcov = vcovHC(regval, method = c(methodval), cluster = c("group"))))$statistic[1])
    p.add = ifelse(pval < 0.1, "*", ifelse(pval < 0.05, "**", ifelse(pval < 0.01, "***", "")))
    return(paste0(as.character(formatC(fval, format = "e", digits = 2)), p.add))}

# Then take p values and adjust them (BH multiple comparisons)
pvals = as.numeric(lapply(ols_list_fixed, function(x){summary(x)[[1]][4]}))
pvals.adjusted = p.adjust(pvals, method = c("BH"))

percentage_changes.df <- data.frame(variable_names = sapply(ols_list_fixed, function(x){str_extract(as.character(x$call[2]), regex("^([\\w]+)"))}), coefs = sapply(ols_list_fixed, function(x){x$coefficients})) %>%
    mutate(perc_decrease = round(100*(1 - exp(coefs)), 1)) %>%
    mutate(perc_decrease = paste0(perc_decrease, "%")) %>%
    filter(variable_names %in% c("DV_MLE", "DV_NLE")|str_detect(variable_names, "logval")) %>%
    mutate(variable_names = str_remove(variable_names, "^DV_"), variable_names = str_remove(variable_names, "_logval$")) %>%
    dplyr::select(-coefs)

percentage_changes.df$variable_names = c("Monthly lighting expenditures", "Non-lighting expenditures", "Total hours", "Non-solar hours", "Lighting satisfaction", "Activity satisfaction", "Perceptions toward solar", "Average time reading", "Average time working", "Average time cooking", "Activity Time", "Mobile Time")

ols_list_fixed = ols_list_fixed[c(1:11)]
hetero_consistent_se_white = hetero_consistent_se_white[c(1:11)]

dep_vars = dep_vars[-c(12:21)]
dep_vars_names = dep_vars_names[-c(12:21)]
pvals = pvals[-c(12:21)]
pvals.adjusted = pvals.adjusted[-c(12:21)]

# 1.2) Break up so fits on page
no_robustness_ols = ols_list_fixed[-c(7, 10)]
no_robustness_pvals = pvals[-c(7, 10)]
no_robustness_names = dep_vars_names[-c(7, 10)]
no_robustness_ols_1 = no_robustness_ols[c(1:4)]
no_robustness_ols_2 = no_robustness_ols[c(5:9)]
no_robustness_pvals_1 = no_robustness_pvals[c(1:4)]
no_robustness_pvals_2 = no_robustness_pvals[c(5:9)]
no_robustness_names_1 = no_robustness_names[c(1:4)]
no_robustness_names_2 = no_robustness_names[c(5:9)]

Table_3a <- stargazer(no_robustness_ols_1,
                      omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white[1:4],
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = no_robustness_names_1,
                      font.size = c("footnotesize"),
                      omit.table.layout = "n",
                      dep.var.caption = c("\\textbf{Effects on household expenditures and hours of lighting}"),
                      model.numbers = FALSE,  omit.stat = "f",
                      p.auto = FALSE,
                      float = FALSE)

Table_3b <- stargazer(no_robustness_ols_2,
                      omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white[5:9],
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = no_robustness_names_2,
                      font.size = c("footnotesize"),
                      omit.table.layout = "n",
                      dep.var.caption = c("\\textbf{Effects on satisfaction, perception, and time}"),
                      model.numbers = FALSE,  omit.stat = "f",
                      p = list(no_robustness_pvals_2),
                      p.auto = FALSE,
                      float = FALSE)

# Robustness
ols_list_rob = ols_list_fixed[c(9, 10, 6, 7)]
hetero_consistent_se_white_rob = hetero_consistent_se_white[c(9, 10, 6, 7)]
dep_vars_names_rob = dep_vars_names[c(9, 10, 6, 7)]
pvals_rob = pvals[c(9, 10, 6, 7)]

Table_F7 <- stargazer(ols_list_rob, omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white_rob,
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = dep_vars_names_rob,
                      font.size = c("footnotesize"),
                      model.numbers = FALSE,
                      omit.stat = "f",
                      add.lines = list(c("F Statistic (df = 1, 1)",
                                         unlist(lapply(ols_list_rob, f.stat.function, c("white1"))))),
                      p = list(pvals_rob),
                      p.auto = FALSE,
                      float = FALSE)

# 1.3) Segment by leisure activity
dep_vars_act = names(combined_data_analysis)[c(171:174)] %>% as.list()
dep_vars_names_act = c("Reading", "Working", "Cooking", "Studying")
ols_list_fixed_act = lapply(dep_vars_act, f.ols.fixed)

# White clustered SEs
hetero_consistent_se_white_act = lapply(ols_list_fixed_act, function(x) sqrt(diag(vcovHC(x, method = c("white1"), cluster = c("group")))))

# First update standard errors and manually change F statistic since stargazer doesn't do that
# Then take p values and adjust them (BH multiple comparisons)
pvals_act = as.numeric(lapply(ols_list_fixed_act, function(x){summary(x)[[1]][4]}))
pvals.adjusted_act = p.adjust(pvals_act, method = c("BH"))

# 1.4) Segment by satisfaction
dep_vars_al = names(combined_data_analysis)[c(112:115)] %>% as.list()
dep_vars_names_al = c("Cost", "Safety", "Quality", "Reliability")
ols_list_fixed_al = lapply(dep_vars_al, f.ols.fixed)
hetero_consistent_se_white_al = lapply(ols_list_fixed_al, function(x) sqrt(diag(vcovHC(x, method = c("white1"), cluster = c("group")))))

pvals_al = as.numeric(lapply(ols_list_fixed_al, function(x){summary(x)[[1]][4]}))
pvals.adjusted_al = p.adjust(pvals_al, method = c("BH"))

Table_4a <- stargazer(ols_list_fixed_act, omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white_act,
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = dep_vars_names_act,
                      font.size = c("footnotesize"),
                      model.numbers = FALSE,
                      omit.table.layout = "n",
                      dep.var.caption = c("\\textbf{Effects on satisfaction with lighting for leisure activities}"),
                      omit.stat = "f",
                      p = list(pvals_act),
                      p.auto = FALSE,
                      float = FALSE)

Table_4b <- stargazer(ols_list_fixed_al, omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white_al,
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = dep_vars_names_al,
                      font.size = c("footnotesize"),
                      model.numbers = FALSE,
                      omit.table.layout = "n",
                      dep.var.caption = c("\\textbf{Effects on satisfaction with lighting across various dimensions}"),
                      omit.stat = "f",
                      p = list(pvals_al),
                      p.auto = FALSE,
                      float = FALSE)

# 1.3) Segment time by activity
dep_vars_act_time = names(combined_data_analysis)[c(211:213, 170)] %>% as.list()
dep_vars_names_act_time = c("Reading", "Working", "Cooking", "Studying")
ols_list_fixed_act_time = lapply(dep_vars_act_time, f.ols.fixed)

# White clustered SEs
hetero_consistent_se_white_act_time = lapply(ols_list_fixed_act_time, function(x) sqrt(diag(vcovHC(x, method = c("white1"), cluster = c("group")))))

# First update standard errors and manually change F statistic since stargazer doesn't do that
# Then take p values and adjust them (BH multiple comparisons)
pvals_act_time = as.numeric(lapply(ols_list_fixed_act_time, function(x){summary(x)[[1]][4]}))
pvals.adjusted_act_time = p.adjust(pvals_act_time, method = c("BH"))

Table_F10 <- stargazer(ols_list_fixed_act_time, omit = c("factor\\(hhid\\)"),
                       se = hetero_consistent_se_white_act_time,
                       covariate.labels = c("Treatment"),
                       dep.var.labels = dep_vars_names_act_time,
                       font.size = c("footnotesize"),
                       model.numbers = FALSE,
                       omit.table.layout = "n",
                       dep.var.caption = c("\\textbf{Effects on time spent on leisure activities, segmented by activity}"),
                       omit.stat = "f",
                       p = list(pvals_act_time),
                       p.auto = FALSE,
                       float = FALSE)


combined_data_analysis.st = combined_data_analysis %>% dplyr::select(Survey, hhid, belong.treatment, unlist(dep_vars))

stFunction = function(st){
    if(any(is.character(st)|is.factor(st))){return(st)}
    else{
        temp.mean = mean(st, na.rm = T)
        temp.sd = sqrt(var(st, na.rm = T))
        st.mz = st - rep(temp.mean, length(st))
        st.mz.stan = st.mz/temp.sd
        return(st.mz/temp.sd)}}

combined_data_analysis.st = combined_data_analysis.st %>%
    mutate_at(vars(-Survey, -hhid, -belong.treatment), funs(. - mean(., na.rm = TRUE))) %>%
    mutate_at(vars(-Survey, -hhid, -belong.treatment), funs(./sd(., na.rm = TRUE)))

fe.df.st = pdata.frame(combined_data_analysis.st, index = c("Survey", "hhid"))

f.ols.fixed.st <- function(depvar){
    temp = paste0("plm(", depvar, " ~ belong.treatment, data = fe.df.st, model = 'within', effect = 'twoways')")
    temp.reg = eval(parse(text = temp))
    temp.summ = summary(temp.reg)
    return(temp.reg)}

ols_list_fixed_st = lapply(dep_vars, f.ols.fixed.st)

# White clustered SEs
hetero_consistent_se_white_st = lapply(ols_list_fixed_st, function(x) sqrt(diag(vcovHC(x, method = c("white1"), cluster = c("group")))))
pvals_st = as.numeric(lapply(ols_list_fixed_st, function(x){summary(x)[[1]][4]}))
pvals.adjusted_st = p.adjust(pvals_st, method = c("BH"))


no_robustness_ols_st = ols_list_fixed_st[-c(7, 10)]
no_robustness_pvals_st = pvals_st[-c(7, 10)]
no_robustness_names_st = dep_vars_names[-c(7, 10)]

no_robustness_ols_1_st = no_robustness_ols_st[c(1:4)]
no_robustness_ols_2_st = no_robustness_ols_st[c(5:9)]
no_robustness_pvals_1_st = no_robustness_pvals_st[c(1:4)]
no_robustness_pvals_2_st = no_robustness_pvals_st[c(5:9)]
no_robustness_names_1_st = no_robustness_names_st[c(1:4)]
no_robustness_names_2_st = no_robustness_names_st[c(5:9)]

# 2.1) Break up so fits on page

Table_D6a <- stargazer(no_robustness_ols_1_st,
                       omit = c("factor\\(hhid\\)"),
                       se = hetero_consistent_se_white_st[1:4],
                       covariate.labels = c("Treatment", "Endline"),
                       dep.var.labels = no_robustness_names_1_st,
                       font.size = c("footnotesize"),
                       model.numbers = FALSE,
                       omit.stat = "f",
                       add.lines = list(c("F Statistic (df = 1, 1)",
                                          unlist(lapply(no_robustness_ols_1_st,
                                                        f.stat.function, c("white1"))))),
                       p = list(no_robustness_pvals_1),
                       p.auto = FALSE,
                       float = FALSE,
                       title = "Effect of lanterns on household expenditures and hours of lighting (standardized)")


Table_D6b <- stargazer(no_robustness_ols_2_st,
                       omit = c("factor\\(hhid\\)"),
                       se = hetero_consistent_se_white_st[5:9],
                       covariate.labels = c("Treatment", "Endline"),
                       dep.var.labels = no_robustness_names_2_st,
                       font.size = c("footnotesize"),
                       model.numbers = FALSE,
                       omit.stat = "f",
                       add.lines = list(c("F Statistic (df = 1, 1)",
                                          unlist(lapply(no_robustness_ols_2_st,
                                                        f.stat.function, c("white1"))))),
                       p = list(no_robustness_pvals_2),
                       p.auto = FALSE,
                       float = FALSE,
                       title = "Effect of lanterns on satisfaction, perception of solar technology, and allocation of time (standardized)")

# Pull coefficient values and names
pull_coef = function(obj, val){
    name = as.character(summary(obj)$formula[2])
    coef.val = summary(obj)$coefficients[1]
    se = as.character(summary(obj)$formula[2])
    out = ifelse(val == "name", name, coef.val)
    return(out)
}
coef.plot.names = unlist(lapply(ols_list_fixed, pull_coef, "name"))
coef.plot.coefs = unlist(lapply(ols_list_fixed, pull_coef, "coef"))


# Standard errors are white
coef.plot.se = as.numeric(unlist(hetero_consistent_se_white))

# Create data frame for coefficient plot and check that names align with dep_var_names


coef.df = data.frame(Name = coef.plot.names,
                     Coefficient = coef.plot.coefs,
                     SE =coef.plot.se,
                     P_Value = pvals.adjusted) %>%
mutate(Name_2 = str_trim(str_remove(dep_vars_names, "\\\\\\$")))

# Order variables and rename them
coef.df2 =  coef.df %>%
    mutate(Name_2 = str_trim(str_remove(Name_2, "\\.$")),
           Name_2 = factor(Name_2, levels = c("Mobile Time",
                                              "Activity Time Norm",
                                              "Activity Time",
                                              "Perception",
                                              "Wtd. Activity Sat",
                                              "Activity Sat",
                                              "Lighting Sat",
                                              "Non-Solar Hrs",
                                              "Lighting Hrs",
                                              "Non-lighting",
                                              "Lighting"))) %>%
dplyr::select(-Name) %>% rename(Name = Name_2) %>%
mutate(Name = case_when(Name == "Mobile Time" ~ "Time charging\nmobile device",
                        Name == "Activity Time Norm" ~ "Time spent on activities\n (normalized)",
                        Name ==  "Activity Time" ~ "Time spent\non activites",
                        Name ==  "Perception" ~ "Perception\n of solar",
                        Name ==  "Wtd. Activity Sat" ~ "Satisfaction\n from activities \nweighted by time",
                        Name ==  "Activity Sat" ~ "Satisfaction\n from activities",
                        Name ==  "Lighting Sat" ~ "Satisfaction\n with lighting",
                        Name ==  "Non-Solar Hrs" ~ "Hours of lighting \n(non-solar sources)",
                        Name ==  "Lighting Hrs" ~ "Total hours\n of lighting",
                        Name ==  "Non-lighting" ~ "Non-lighting\nexpenditures",
                        TRUE ~ "Lighting\nexpenditures"))

# Add stars based on p values and find upper and lower bounds
coef.df3 = coef.df2 %>%
    mutate(Name = case_when((0.05 < P_Value) & (P_Value < 0.1) ~ paste0(Name, "*"),
                            (0.01 < P_Value) & (P_Value < 0.05) ~ paste0(Name, "**"),
                            (P_Value<0.01)  ~ paste0(Name, "***"),
                            TRUE ~ Name)) %>% as.data.frame() %>%
dplyr::select(-P_Value) %>%
mutate(LB = Coefficient - 1.96 * SE, UB = Coefficient + 1.96 * SE) %>%
dplyr::select(-SE) %>% dplyr::select(Name, LB, Coefficient, UB) %>% mutate(Col = ifelse(UB < 0 | LB > 0, "Sig", "Non-Sig"))


# Create coefficient plot

Figure_4 = coef.df3 %>%
    mutate(Name = factor(Name, levels = c(Name[c(length(Name):1)]))) %>%
    ggplot(aes(x = Name, y = Coefficient,
               ymin = LB, ymax = UB, colour = Col), size = 1) +
geom_pointrange(stroke = 0)  +
geom_hline(aes(yintercept=0), lty=2) +
geom_hline(aes(yintercept = -1), lty=2, size = 0.1) +
geom_hline(aes(yintercept = 1), lty = 2, size = 0.1) +
geom_hline(aes(yintercept = -2), lty = 2, size = 0.1) +
geom_hline(aes(yintercept = 2), lty = 2, size = 0.1) +
geom_hline(aes(yintercept = -3), lty = 2, size = 0.1) +
geom_hline(aes(yintercept = 3), lty = 2, size = 0.1) +
ylim(c(-4, 4)) +
coord_flip() + xlab('') + ylab('Coefficient') +
theme_tufte() + geom_tufteboxplot()  +
scale_colour_stata(guide = FALSE) +
theme(axis.text.y = element_text(size = 10, hjust= 0))


# Pull coefficient values and names
coef.plot.names_st = unlist(lapply(ols_list_fixed_st, pull_coef, "name"))
coef.plot.coefs_st = unlist(lapply(ols_list_fixed_st, pull_coef, "coef"))

# Standard errors are white
coef.plot.se_st = as.numeric(unlist(hetero_consistent_se_white_st))

# Create data frame for coefficient plot and check that names align with dep_var_names
coef.df_st = data.frame(Name = coef.plot.names_st,
                        Coefficient = coef.plot.coefs_st,
                        SE =coef.plot.se_st,
                        P_Value = pvals.adjusted_st) %>%
mutate(Name_2 = str_trim(str_remove(dep_vars_names, "\\\\\\$")))

# Order variables and rename them
coef.df2_st =  coef.df_st %>%
    mutate(Name_2 = str_trim(str_remove(Name_2, "\\.$")),
           Name_2 = factor(Name_2, levels = c("Mobile Time",
                                              "Activity Time Norm",
                                              "Activity Time",
                                              "Perception",
                                              "Wtd. Activity Sat",
                                              "Activity Sat",
                                              "Lighting Sat",
                                              "Non-Solar Hrs",
                                              "Lighting Hrs",
                                              "Non-lighting",
                                              "Lighting"))) %>%
dplyr::select(-Name) %>% rename(Name = Name_2) %>%
mutate(Name = case_when(Name == "Mobile Time" ~ "Time charging\nmobile device",
                        Name == "Activity Time Norm" ~ "Time spent on activities\n (normalized)",
                        Name ==  "Activity Time" ~ "Time spent\non activites",
                        Name ==  "Perception" ~ "Perception\n of solar",
                        Name ==  "Wtd. Activity Sat" ~ "Satisfaction\n from activities \nweighted by time",
                        Name ==  "Activity Sat" ~ "Satisfaction\n from activities",
                        Name ==  "Lighting Sat" ~ "Satisfaction\n with lighting",
                        Name ==  "Non-Solar Hrs" ~ "Hours of lighting \n(non-solar sources)",
                        Name ==  "Lighting Hrs" ~ "Total hours\n of lighting",
                        Name ==  "Non-lighting" ~ "Non-lighting\nexpenditures",
                        TRUE ~ "Lighting\nexpenditures"))

# Add stars based on p values and find upper and lower bounds
coef.df3_st = coef.df2_st %>%
    mutate(Name = case_when((0.05 < P_Value) & (P_Value < 0.1) ~ paste0(Name, "*"),
                            (0.01 < P_Value) & (P_Value < 0.05) ~ paste0(Name, "**"),
                            (P_Value<0.01)  ~ paste0(Name, "***"),
                            TRUE ~ Name)) %>% as.data.frame() %>%
dplyr::select(-P_Value) %>%
mutate(LB = Coefficient - 1.96 * SE, UB = Coefficient + 1.96 * SE) %>%
dplyr::select(-SE) %>% dplyr::select(Name, LB, Coefficient, UB) %>% mutate(Col = ifelse(UB < 0 | LB > 0, "Sig", "Non-Sig"))


# Create coefficient plot
Figure_D14 = coef.df3_st %>%
    mutate(Name = factor(Name, levels = c(Name[c(length(Name):1)]))) %>%
    ggplot(aes(x = Name, y = Coefficient, ymin = LB, ymax = UB, colour = Col), size = 1) +
    geom_pointrange(stroke = 0)  + ylim(c(-1.5, 1.5)) +
    scale_y_continuous(breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
    geom_hline(aes(yintercept=0), lty=2) +
    coord_flip() + xlab('') + ylab('Coefficient') +
    theme_tufte() + geom_tufteboxplot()   +
    scale_colour_stata(guide = FALSE) +
    theme(axis.text.y = element_text(size = 10, hjust= 0))

##################################################################################
# Spending: Figure 5
##################################################################################
combined_data_analysis = combined_data_included_dv %>%
    mutate(belong.treatment = ifelse(Survey == "Baseline", 0, belong.treatment),
           DV_MLE = log(1 + DV_MLE), DV_NLE = log(1 + DV_NLE))

combined_data_analysis_temp = combined_data_analysis %>%
    gather(key, value, -hhid, -Survey) %>%
    unite("comb", c(Survey, key)) %>%
    spread(comb, value) %>%
    mutate(ever.treated = Endline_belong.treatment) %>%
    gather(key, value, -hhid, -ever.treated) %>%
    separate(key, into = c("Survey", "key2"), sep = "_",
             remove = TRUE, extra = "merge") %>%
    spread(key2, value)


combined_data_analysis_temp = combined_data_analysis_temp %>%
    dplyr::select(hhid, ever.treated, Survey, starts_with("expend")) %>%
    mutate_at(vars(-Survey), funs(as.numeric(.))) %>%
    mutate(expend.total.three.months = expend.total.three.months - expend.lighting) %>%
    dplyr::select(-expend.lighting) %>%
    mutate_at(vars(expend.consumer:expend.other),
              funs(per = 100 * round((./expend.total.three.months), 2))) %>%
    dplyr::select(hhid, ever.treated, Survey, contains("_per")) %>%
    group_by(Survey, ever.treated) %>%
    summarize_at(vars(expend.consumer_per:expend.other_per), funs(mean(., na.rm = T))) %>%
    gather(key, value, - Survey, -ever.treated) %>%
    arrange(Survey, ever.treated, desc(value)) %>%
    ungroup() %>%
    mutate(key = str_remove(key, "_per")) %>%
    mutate(key = str_remove(key, "^expend\\.")) %>%
    mutate(key = str_to_title(key)) %>%
    mutate(ever.treated = case_when(ever.treated == 0 ~ "Non-treated",
                                    TRUE ~ "Treated")) %>%
    mutate(ever.treated = factor(ever.treated)) %>%
    mutate(key = ifelse(key == "Housing.durable", "Housing/durable", key))


Figure_5 = combined_data_analysis_temp %>%
    mutate(key = factor(key, levels = c("Food", "Doctor", "Consumer", "Debt", "Estate", "Housing/durable", "Marriage", "Other"))) %>%
    rename(Percentage = value) %>%
    ggplot(aes(x = key, y = Percentage, fill = Survey)) + geom_col(position = "dodge") + theme_tufte() + xlab("Spending on category") + scale_fill_grey(start = 0.5, end = 0.75) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + facet_wrap(~ ever.treated, scale = "free", nrow = 2)

##################################################################################
# Amortization: Figure E15, E16
##################################################################################
# Bensch et al 2018 compares hypothetical monthly SHS cost to
# substitutable energy expenses (i.e., costs that can be covered
# using SHS) among non-adopters; but we have monthly lighting
# expenditures for those who transitioned to SHS so we can simply
# rely on those


# dv_calc has not used logarithm transform
amortization_1 <- combined_data_included_dv %>% as_tibble() %>%
    dplyr::select(hhid, village.code, belong.treatment,
                  DV_MLE, DV_NLE, DV_TH, DV_NSH) %>%
mutate(survey = ifelse(is.na(belong.treatment), "Baseline", "Endline"),
       DV_SH = DV_TH - DV_NSH) %>%
dplyr::select(-DV_TH) %>%
group_by(hhid) %>%
mutate(belong.treatment = max(belong.treatment, na.rm = T)) %>%
gather(key = variable, value = value, -c(hhid, village.code, belong.treatment, survey)) %>%
unite(var2, survey, variable) %>%
spread(var2, value)

amortization_1 %>% filter(belong.treatment == 1) %>% mutate(zero_endline = (Endline_DV_MLE == 0)) %>% group_by(zero_endline) %>% summarize(tot = n()/97) %>% ungroup()

# Show relationship between change in monthly lighting expenses based on Lighting and Non-lighting expenditures

amort.df <- amortization_1 %>% filter(belong.treatment == 1) %>%
    mutate(Change_LE = Endline_DV_MLE - Baseline_DV_MLE,
           Baseline_DV_NLE = Baseline_DV_NLE/3) %>%
ungroup() %>%
dplyr::select(Change_MLE = Change_LE, Baseline_DV_MLE, Baseline_DV_NLE) %>%
arrange(desc(Baseline_DV_NLE)) %>%
gather(key = Expenditure_Type, value = Value, -Change_MLE) %>%
mutate(Expenditure_Type = ifelse(Expenditure_Type == "Baseline_DV_MLE",
                                 "Lighting", "Non-lighting")) %>%
filter(Change_MLE > min(Change_MLE))

Figure_E15 <- amort.df %>%  ggplot(aes(x = Value, y = Change_MLE)) + geom_point(size = .8) + facet_wrap(~ Expenditure_Type, scales = "free", strip.position = "bottom") + ylab("Treated household change\n in monthly lighting expenses (INR)") + xlab("Baseline monthly expenditures (INR)") + scale_x_continuous(trans = log1p_trans(), breaks = c(50, 100, 200,300, 500, 1000, 3000, 5000, 10000, 30000)) + theme_bw() + geom_smooth(method = "lm") + theme(axis.text.x = element_text(angle = 40, hjust = 1), axis.title.y = element_text(size = 9), axis.title.x = element_text(size = 9))

amort.df2 <-  amort.df %>% group_by(Expenditure_Type) %>% mutate(Quartile_Rank = ntile(Value, 4)) %>% group_by(Quartile_Rank, Expenditure_Type) %>% summarize(Quartile_SEE = mean(Change_MLE), Quartile_Average_Expenditures = mean(Value)) %>% arrange(Expenditure_Type, Quartile_Rank)

# Solar lantern cost approximately USD 20 (1400 INR)

currency <- read_csv("currency.csv", col_names = FALSE) %>%
    mutate(X1 = str_replace_all(X1, "-", " "), X1 = str_replace_all(X1, "18$", "2018"), X1 = str_replace_all(X1, "19$", "2019"), X1 = str_replace_all(X1, "Jan", "January"), X1 = str_replace_all(X1, "Feb", "February"), X1 = str_replace_all(X1, "Mar", "March"), X1 = str_replace_all(X1, "Apr", "April"), X1 = str_replace_all(X1, "Jun", "June"), X1 = str_replace_all(X1, "Jul", "July"), X1 = str_replace_all(X1, "Aug", "August"), X1 = str_replace_all(X1, "Sep", "September"), X1 = str_replace_all(X1, "Oct", "October"), X1 = str_replace_all(X1, "Nov", "November"), X1 = str_replace_all(X1, "Dec", "December"))

currency %>% mutate(day = str_extract(X1, pattern = regex("^\\d*\\s"))) %>% mutate(month = str_extract(X1, pattern = regex("(?<=\\s)(.*)\\s"))) %>% mutate(day = str_replace(day, " ", ""), day = as.numeric(day)) %>% mutate(month = str_replace(month, " ", "")) %>% mutate(year = str_extract(X1, pattern = "\\d{4}$")) %>% dplyr::select(month, day, year, conversion_rate = X2) %>% filter(month %in% c("February", "March", "April", "May", "June", "July", "August")) %>% filter(!(month == "August" && day > 1) & (year == 2018)) %>% summarize(mean(conversion_rate))


amortFun <- function(amount, apr, years){
    monthly_rate <- apr/12
    periods <- 12 * years
    principleIncrease <- (1 + monthly_rate)^periods
    paymentDecrease <- (principleIncrease - 1)/monthly_rate
    return((amount * principleIncrease)/paymentDecrease)
}


rate_chart <- crossing(rate = c(0.1, 0.3), Quartile_Rank = c(1, 2, 3, 4), term = c(1,2,3))

# amort.df2 %>% filter(Expenditure_Type == "Non-lighting")

Figure_E16a <- amort.df2 %>% ungroup() %>%
    filter(Expenditure_Type == "Non-lighting") %>%
    dplyr::select(Quartile_Rank, Quartile_SEE) %>%
    as_tibble() %>% left_join(rate_chart) %>%
    mutate(monthly_payment = amortFun(amount = 1400, apr = rate, years = term)) %>%
    mutate(monthly_expenditure = monthly_payment + Quartile_SEE) %>%
    dplyr::select(-Quartile_SEE, -monthly_payment) %>%
    mutate(term = case_when(term == 1 ~ "One Year",
                            term == 2 ~ "Two Years",
                            TRUE ~ "Three Years"),
           Quartile = case_when(Quartile_Rank == 4 ~ "75th-99th\n(10468 INR)",
                                Quartile_Rank == 3 ~ "50th-75th\n(5417 INR)",
                                Quartile_Rank == 2 ~ "25th-50th\n(3693 INR)",
                                TRUE ~ "0th-25th\n(2422 INR)"),
           Rate = ifelse(rate == .1, "10% p.a.", "30% p.a."))  %>%
    rename(Term = term) %>%
    dplyr::select(-rate, -Quartile_Rank) %>%
    mutate(Quartile = factor(Quartile, levels = c("0th-25th\n(2422 INR)", "25th-50th\n(3693 INR)", "50th-75th\n(5417 INR)", "75th-99th\n(10468 INR)")), Term = factor(Term, levels = c("One Year", "Two Years", "Three Years"))) %>%
    ggplot(aes(x = Term, y = monthly_expenditure)) + geom_col(width = .4, aes(fill = (monthly_expenditure<0))) + theme_tufte() + scale_fill_grey(start = .3, end = .6) + facet_grid(Rate ~ Quartile) + theme(axis.text.x = element_text(angle = 40, hjust = 1), legend.position = "none") + xlab("Lifetime / Loan Term") + ylab("Net monthly \nlighting expenditures\n(INR)")


Figure_E16b <- amort.df2 %>% ungroup() %>%
    filter(Expenditure_Type == "Lighting") %>%
    dplyr::select(Quartile_Rank, Quartile_SEE) %>%
    as_tibble() %>% left_join(rate_chart) %>%
    mutate(monthly_payment = amortFun(amount = 1400, apr = rate, years = term)) %>%
    mutate(monthly_expenditure = monthly_payment + Quartile_SEE) %>%
    dplyr::select(-Quartile_SEE, -monthly_payment) %>%
    mutate(term = case_when(term == 1 ~ "One Year", term == 2 ~ "Two Years", TRUE ~ "Three Years"), Quartile = case_when(Quartile_Rank == 4 ~ "75th-99th\n(185 INR)", Quartile_Rank == 3 ~ "50th-75th\n(108 INR)", Quartile_Rank == 2 ~ "25th-50th\n(99 INR)", TRUE ~ "0th-25th\n(63.2 INR)"), Rate = ifelse(rate == .1, "10% p.a.", "30% p.a."))  %>%
    rename(Term = term) %>%
    dplyr::select(-rate, -Quartile_Rank) %>%
    mutate(Quartile = factor(Quartile, levels = c("0th-25th\n(63.2 INR)", "25th-50th\n(99 INR)", "50th-75th\n(108 INR)", "75th-99th\n(185 INR)")), Term = factor(Term, levels = c("One Year", "Two Years", "Three Years"))) %>%
    ggplot(aes(x = Term,  y = monthly_expenditure)) + geom_col(width = .4, aes(fill = (monthly_expenditure<0))) + scale_fill_grey(start = .3, end = .6) + theme_tufte() + facet_grid(Rate ~ Quartile) + theme(axis.text.x = element_text(angle = 40, hjust = 1), legend.position = "none") + xlab("Lifetime / Loan Term") + ylab("Net monthly \nlighting expenditures\n(INR)")


##################################################################################
# Saving Tables and Figures
##################################################################################
ggsave("Figure_1.pdf", plot = Figure_1,
       device = "pdf",  width = 7, height = 7,
       units = "in")

ggsave("Figure_4.pdf", plot = Figure_4,
       device = "pdf",  width = 5, height = 5,
       units = "in")

ggsave("Figure_5.pdf", plot = Figure_5,
       device = "pdf",  width = 5, height = 8,
       units = "in")

ggsave("Figure_B6.pdf", plot = Figure_B6,
       device = "pdf",  width = 7, height = 4,
       units = "in")

ggsave("Figure_B7.pdf", plot = Figure_B7,
       device = "pdf",  width = 7, height = 4,
       units = "in")

ggsave("Figure_C8.pdf", plot = Figure_C8,
       device = "pdf",  width = 7, height = 7,
       units = "in")

ggsave("Figure_C9.pdf", plot = Figure_C9,
       device = "pdf",  width = 7, height = 7,
       units = "in")

ggsave("Figure_C10.pdf", plot = Figure_C10,
       device = "pdf",  width = 7, height = 7,
       units = "in")

ggsave("Figure_C11.pdf", plot = Figure_C11,
       device = "pdf",  width = 7, height = 6,
       units = "in")

ggsave("Figure_C12.pdf", plot = Figure_C12,
       device = "pdf",  width = 7, height = 6,
       units = "in")

ggsave("Figure_C13.pdf", plot = Figure_C13,
       device = "pdf",  width = 7, height = 6,
       units = "in")

ggsave("Figure_D14.pdf", plot = Figure_D14,
       device = "pdf",  width = 5, height = 5,
       units = "in")

ggsave("Figure_E15.pdf", plot = Figure_E15,
       device = "pdf",  width = 7, height = 3,
       units = "in")

ggsave("Figure_E16a.pdf", plot = Figure_E16a,
       device = "pdf",  width = 7, height = 3,
       units = "in")

ggsave("Figure_E16b.pdf", plot = Figure_E16b,
       device = "pdf",  width = 7, height = 3,
       units = "in")

# Print Tables 3a, 3b
stargazer(no_robustness_ols_1,
          omit = c("factor\\(hhid\\)"),
          se = hetero_consistent_se_white[1:4],
          covariate.labels = c("Treatment", "Endline"),
          dep.var.labels = no_robustness_names_1,
          font.size = c("footnotesize"),
          out = "Table_3a.tex",
          omit.table.layout = "n",
          dep.var.caption = c("\\textbf{Effects on household expenditures and hours of lighting}"),
          model.numbers = FALSE,  omit.stat = "f",
          p.auto = FALSE,
          float = FALSE)

stargazer(no_robustness_ols_2,
          omit = c("factor\\(hhid\\)"),
          se = hetero_consistent_se_white[5:9],
          covariate.labels = c("Treatment", "Endline"),
          dep.var.labels = no_robustness_names_2,
          font.size = c("footnotesize"),
          out = "Table_3b.tex",
          omit.table.layout = "n",
          dep.var.caption = c("\\textbf{Effects on satisfaction, perception, and time}"),
          model.numbers = FALSE,  omit.stat = "f",
          p = list(no_robustness_pvals_2),
          p.auto = FALSE,
          float = FALSE)

# Print Tables 4a, 4b
stargazer(ols_list_fixed_act, omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white_act,
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = dep_vars_names_act,
                      font.size = c("footnotesize"),
                      model.numbers = FALSE,
                      out = "Table_4a.tex",
                      omit.table.layout = "n",
                      dep.var.caption = c("\\textbf{Effects on satisfaction with lighting for leisure activities}"),
                      omit.stat = "f",
                      p = list(pvals_act),
                      p.auto = FALSE,
                      float = FALSE)

stargazer(ols_list_fixed_al, omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white_al,
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = dep_vars_names_al,
                      font.size = c("footnotesize"),
                      model.numbers = FALSE,
                      out = "Table_4b.tex",
                      omit.table.layout = "n",
                      dep.var.caption = c("\\textbf{Effects on satisfaction with lighting across various dimensions}"),
                      omit.stat = "f",
                      p = list(pvals_al),
                      p.auto = FALSE,
                      float = FALSE)


# Print Table C5
combined_data_3 %>% dplyr::select(Variable = key2, Mean = avg, `Std. Error` = se, `n` = total, treatmentType, ordering) %>% split(.$treatmentType) %>%
    map(function(x){
            treatmentType <- unique(x$treatmentType)
            t1 <- x %>% arrange(ordering) %>% dplyr::select(-treatmentType, -ordering)
            t2 <- t1 %>% xtable(display = c("d", "s", "f", "f", "d"))
            print(t2, include.rownames = FALSE,
                  file = "Table_C5.tex",
                  append = TRUE, floating = FALSE)
          })


# Print Table D6
stargazer(no_robustness_ols_1_st,
                       omit = c("factor\\(hhid\\)"),
                       se = hetero_consistent_se_white_st[1:4],
                       covariate.labels = c("Treatment", "Endline"),
                       dep.var.labels = no_robustness_names_1_st,
                       font.size = c("footnotesize"),
                       model.numbers = FALSE,
                       out = "Table_D6a.tex",
                       omit.stat = "f",
                       add.lines = list(c("F Statistic (df = 1, 1)",
                                          unlist(lapply(no_robustness_ols_1_st,
                                                        f.stat.function, c("white1"))))),
                       p = list(no_robustness_pvals_1),
                       p.auto = FALSE,
                       float = FALSE,
                       title = "Effect of lanterns on household expenditures and hours of lighting (standardized)")

stargazer(no_robustness_ols_2_st,
                       omit = c("factor\\(hhid\\)"),
                       se = hetero_consistent_se_white_st[5:9],
                       covariate.labels = c("Treatment", "Endline"),
                       dep.var.labels = no_robustness_names_2_st,
                       font.size = c("footnotesize"),
                       model.numbers = FALSE,
                       out = "Table_D6b.tex",
                       omit.stat = "f",
                       add.lines = list(c("F Statistic (df = 1, 1)",
                                          unlist(lapply(no_robustness_ols_2_st,
                                                        f.stat.function, c("white1"))))),
                       p = list(no_robustness_pvals_2),
                       p.auto = FALSE,
                       float = FALSE,
                       title = "Effect of lanterns on satisfaction, perception of solar technology, and allocation of time (standardized)")

# Print Table F7 and F10
stargazer(ols_list_rob, omit = c("factor\\(hhid\\)"),
                      se = hetero_consistent_se_white_rob,
                      covariate.labels = c("Treatment", "Endline"),
                      dep.var.labels = dep_vars_names_rob,
                      font.size = c("footnotesize"),
                      model.numbers = FALSE,
                      out = "Table_F7.tex",
                      omit.stat = "f",
                      add.lines = list(c("F Statistic (df = 1, 1)",
                                         unlist(lapply(ols_list_rob, f.stat.function, c("white1"))))),
                      p = list(pvals_rob),
                      p.auto = FALSE,
                      float = FALSE)

stargazer(ols_list_fixed_act_time, omit = c("factor\\(hhid\\)"),
                       se = hetero_consistent_se_white_act_time,
                       covariate.labels = c("Treatment"),
                       dep.var.labels = dep_vars_names_act_time,
                       font.size = c("footnotesize"),
                       model.numbers = FALSE, 
                       out = "Table_F10.tex",
                       omit.table.layout = "n",
                       dep.var.caption = c("\\textbf{Effects on time spent on leisure activities, segmented by activity}"),
                       omit.stat = "f",
                       p = list(pvals_act_time),
                       p.auto = FALSE,
                       float = FALSE)

